# Heavy vs. light flowers # Callin Switzer # 12 October 2016 # update 8 Dec 2017 # # Compares the bees' frequency when they are buzzing on # heavy (metal added) vs. light flowers # Note: all pores were glued shut
#install packages
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("ggplot2", "reshape2", 'lme4', 'sjPlot', "multcomp", "plyr", "effects")
ipak(packages)
## Loading required package: ggplot2
## Loading required package: reshape2
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: sjPlot
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.10
## Current Matrix version is 1.2.11
## Please re-install 'TMB' from source or restore original 'Matrix' package
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## Loading required package: plyr
## Loading required package: effects
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## ggplot2 reshape2 lme4 sjPlot multcomp plyr effects
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# set ggplot theme
theme_set(theme_classic())
# define data and figure directories
dataDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehData"
figDir <- "/Users/cswitzer/Dropbox/SonicationBehavior/SonBehFigs"
# print system info
print(paste("last run ", Sys.time()))
## Warning in as.POSIXlt.POSIXct(x, tz): unknown timezone 'zone/tz/2017c.1.0/
## zoneinfo/America/Los_Angeles'
## [1] "last run 2017-12-08 20:32:49"
print(R.version)
## _
## platform x86_64-apple-darwin15.6.0
## arch x86_64
## os darwin15.6.0
## system x86_64, darwin15.6.0
## status
## major 3
## minor 4.2
## year 2017
## month 09
## day 28
## svn rev 73368
## language R
## version.string R version 3.4.2 (2017-09-28)
## nickname Short Summer
new_df = read.csv(file.path(dataDir, "02_HeavyLight_cleaned.csv"))
head(new_df)
## freq amp datetime rewNum
## 1 250 0.17945 2016_09_28__09_59_13_112 1
## 2 300 0.56657 2016_09_28__09_59_14_520 2
## 3 400 0.68371 2016_09_28__09_59_15_030 3
## 4 410 0.80118 2016_09_28__09_59_15_707 4
## 5 370 0.52666 2016_09_28__09_59_16_277 5
## 6 360 0.31541 2016_09_28__09_59_17_096 6
## accFile beeID hive IT
## 1 2016_09_28__09_59_13_112_220_450_test.txt 7 3 3.98
## 2 2016_09_28__09_59_14_520_220_450_test.txt 7 3 3.98
## 3 2016_09_28__09_59_15_030_220_450_test.txt 7 3 3.98
## 4 2016_09_28__09_59_15_707_220_450_test.txt 7 3 3.98
## 5 2016_09_28__09_59_16_277_220_450_test.txt 7 3 3.98
## 6 2016_09_28__09_59_17_096_220_450_test.txt 7 3 3.98
## accFileAndFolder
## 1 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 2 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 3 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 4 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 5 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## 6 Bee7_28Sept2016_Hive3_S_W/2016_09_28__09_58_58_ampFreq.txt
## dateFrozenOrMarked treatment
## 1 28-Sep-16 sham
## 2 28-Sep-16 sham
## 3 28-Sep-16 sham
## 4 28-Sep-16 sham
## 5 28-Sep-16 sham
## 6 28-Sep-16 sham
# show histograms of frequencies
hist(new_df$freq, breaks = seq(215, 455, by = 10))
# Calculate average for each bee and for each treatment
frqMeans <- as.data.frame(tapply(X = new_df$freq, INDEX = list(new_df$beeID, new_df$treatment), mean))
frqMeans$beeID <- row.names(frqMeans)
frqMeans
## sham weighted beeID
## 1 344.3333 347.0833 1
## 10 331.2000 382.5000 10
## 11 411.7391 387.6923 11
## 12 344.8276 339.2500 12
## 14 324.0625 281.6000 14
## 15 367.5000 376.3158 15
## 16 357.0588 386.0606 16
## 17 314.0741 341.6000 17
## 18 366.4706 345.0000 18
## 19 375.0000 379.2308 19
## 2 298.5294 322.9167 2
## 20 359.8148 313.3333 20
## 21 331.3158 310.7143 21
## 22 399.4286 388.2857 22
## 23 348.2759 351.2821 23
## 24 287.7778 273.3333 24
## 25 345.0000 352.3684 25
## 26 369.6000 392.7273 26
## 27 366.6667 350.0000 27
## 28 364.4444 374.1935 28
## 29 291.2500 319.3333 29
## 3 399.0323 388.9655 3
## 30 368.0769 369.0323 30
## 31 273.5294 320.4167 31
## 32 325.6000 246.1538 32
## 4 356.6667 371.3636 4
## 5 370.7692 353.7143 5
## 6 284.2857 326.5714 6
## 7 366.9697 396.3415 7
## 8 333.8462 342.0833 8
## 9 344.4000 384.4444 9
## blue 319.1429 333.8462 blue
## gold 374.5098 365.2174 gold
## orange 309.6875 319.1000 orange
## pink 385.3333 367.5000 pink
## white 319.7674 326.9444 white
# conver to long format for ggplot-ing
frqLong <- melt(frqMeans, id.vars = "beeID", measure.vars = c("sham", "weighted"),
variable.name = "trt", value.name = "frq")
nrow(frqLong)
## [1] 72
ggplot(frqLong, aes(x = trt, y = frq)) +
geom_boxplot() +
geom_line(aes(group = beeID))+
geom_point()
ggplot(frqLong, aes(x = trt, y = frq)) +
geom_boxplot() +
labs(y = "Buzz Frequency (Hz)", x = "Flower treatment")
#ggsave(file.path(figDir, "heavyLight_frq.pdf"), width = 5, height = 4)
ggplot(new_df, aes(y = freq, x = treatment, fill = treatment)) +
geom_violin() +
geom_point(position= position_jitter(width = 0.2, height = 0), alpha = 0.2)+
facet_wrap(~beeID)
# print some descriptive statistics
nrow(new_df)
## [1] 2360
unique(new_df$hive)
## [1] 3 4
# Calculate average amplitude for each bee and for each treatment
ampMeans <- as.data.frame(tapply(X = new_df$amp, INDEX = list(new_df$beeID, new_df$treatment), mean))
ampMeans$beeID <- row.names(ampMeans)
ampMeans
## sham weighted beeID
## 1 0.8076493 0.49916833 1
## 10 0.4927912 0.31361531 10
## 11 0.3515139 0.23277462 11
## 12 0.4037179 0.36835150 12
## 14 0.2853947 0.22527680 14
## 15 0.4459104 0.39965079 15
## 16 0.5093180 0.37277576 16
## 17 0.3923870 0.34797660 17
## 18 0.3210409 0.23870071 18
## 19 0.4441223 0.25072667 19
## 2 0.3571368 0.34654847 2
## 20 0.4820570 0.35996394 20
## 21 0.4547376 0.35084893 21
## 22 0.3291960 0.25122486 22
## 23 0.6451645 0.46830538 23
## 24 0.4207263 0.23366458 24
## 25 0.3740931 0.20392921 25
## 26 0.2856976 0.22539606 26
## 27 0.4034414 0.31427939 27
## 28 0.4550385 0.40624774 28
## 29 0.4407475 0.22762267 29
## 3 0.4986029 0.57203034 3
## 30 0.5540012 0.29531161 30
## 31 0.5814774 0.31036667 31
## 32 0.3561664 0.05094538 32
## 4 0.3544025 0.38607545 4
## 5 0.5224635 0.33527800 5
## 6 0.2548671 0.37360600 6
## 7 0.4302712 0.48336415 7
## 8 0.7275065 0.28751583 8
## 9 0.3945476 0.29620407 9
## blue 0.3237263 0.19929000 blue
## gold 0.3532037 0.21221913 gold
## orange 0.5373041 0.46789970 orange
## pink 0.3114410 0.40220477 pink
## white 0.2293505 0.15642500 white
ampLong <- melt(ampMeans, id.vars = "beeID", measure.vars = c("sham", "weighted"),
variable.name = "trt", value.name = "amp")
ggplot(ampLong, aes(x = trt, y = amp)) +
geom_boxplot() +
geom_point()
ggplot(ampLong, aes(x = trt, y = amp)) +
geom_boxplot() +
geom_point() +
geom_line(aes(group = beeID))
ggplot(ampLong, aes(x = trt, y = amp)) +
geom_boxplot() +
geom_point() +
labs(y = "Buzz Amplitude (V)", x = "Flower treatment")
#ggsave(file.path(figDir, 'heavyLightAmp.pdf'), width = 5, heigh = 4)
nrow(ampLong)
## [1] 72
new_df$hive = as.factor(new_df$hive)
m1 = lmer(freq ~ treatment+IT+ hive + (1|beeID), data = new_df, REML = FALSE)
summary(m1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: freq ~ treatment + IT + hive + (1 | beeID)
## Data: new_df
##
## AIC BIC logLik deviance df.resid
## 25150.1 25184.7 -12569.0 25138.1 2354
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3652 -0.4741 0.1870 0.6837 2.3281
##
## Random effects:
## Groups Name Variance Std.Dev.
## beeID (Intercept) 871 29.51
## Residual 2357 48.55
## Number of obs: 2360, groups: beeID, 36
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 355.250 59.273 5.993
## treatmentweighted 2.614 2.064 1.267
## IT -1.852 14.875 -0.125
## hive4 -4.128 10.909 -0.378
##
## Correlation of Fixed Effects:
## (Intr) trtmnt IT
## trtmntwghtd -0.002
## IT -0.995 -0.017
## hive4 -0.047 0.000 -0.009
# interaction that may be important, based on domain knowledge
m1_1 = lmer(freq ~ treatment*IT+ hive + (1|beeID), data = new_df, REML = FALSE)
summary(m1_1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: freq ~ treatment * IT + hive + (1 | beeID)
## Data: new_df
##
## AIC BIC logLik deviance df.resid
## 25147.4 25187.8 -12566.7 25133.4 2353
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3263 -0.4723 0.1862 0.6953 2.3652
##
## Random effects:
## Groups Name Variance Std.Dev.
## beeID (Intercept) 883.5 29.72
## Residual 2351.6 48.49
## Number of obs: 2360, groups: beeID, 36
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 385.150 61.254 6.288
## treatmentweighted -48.137 23.516 -2.047
## IT -9.417 15.378 -0.612
## hive4 -4.044 10.983 -0.368
## treatmentweighted:IT 12.750 5.885 2.167
##
## Correlation of Fixed Effects:
## (Intr) trtmnt IT hive4
## trtmntwghtd -0.225
## IT -0.995 0.225
## hive4 -0.045 -0.003 -0.010
## trtmntwg:IT 0.225 -0.996 -0.227 0.004
BIC(m1, m1_1) # stay with model 1 (lower BIC)
## df BIC
## m1 6 25184.69
## m1_1 7 25187.77
m2 = lmer(freq ~ hive + IT+ (1|beeID), data = new_df, REML = FALSE)
BIC(m1, m2) # treatment doesn't make the model better
## df BIC
## m1 6 25184.69
## m2 5 25178.53
anova(m1, m2) # p-value for paper
## Data: new_df
## Models:
## m2: freq ~ hive + IT + (1 | beeID)
## m1: freq ~ treatment + IT + hive + (1 | beeID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 5 25150 25178 -12570 25140
## m1 6 25150 25185 -12569 25138 1.6039 1 0.2054
# p-value for hive
m3 = lmer(freq ~ treatment + (1|beeID), data = new_df, REML = FALSE)
anova(m1, m3)
## Data: new_df
## Models:
## m3: freq ~ treatment + (1 | beeID)
## m1: freq ~ treatment + IT + hive + (1 | beeID)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 25146 25169 -12569 25138
## m1 6 25150 25185 -12569 25138 0.1592 2 0.9235
# diagnostics
m1 <- update(m1, .~., REML =TRUE)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: freq ~ treatment + IT + hive + (1 | beeID)
## Data: new_df
##
## REML criterion at convergence: 25115.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3676 -0.4684 0.1873 0.6825 2.3319
##
## Random effects:
## Groups Name Variance Std.Dev.
## beeID (Intercept) 954.3 30.89
## Residual 2357.8 48.56
## Number of obs: 2360, groups: beeID, 36
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 355.173 61.937 5.734
## treatmentweighted 2.614 2.065 1.266
## IT -1.833 15.543 -0.118
## hive4 -4.129 11.398 -0.362
##
## Correlation of Fixed Effects:
## (Intr) trtmnt IT
## trtmntwghtd -0.002
## IT -0.995 -0.016
## hive4 -0.047 0.000 -0.009
plot(m1)
qqnorm(ranef(m1)$beeID[[1]])
qqline(ranef(m1)$beeID[[1]])
sjp.lmer(m1, type = "re", sort = TRUE) # plot random effects to find any outliers
## Plotting random effects...
sjp.lmer(m1, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
# set number of bootstrap replicates for models
nbootSims = 1000
# using hive 5, since it's the one with the most data
# calculate an average IT for prediction
ITmean = mean(tapply(new_df$IT, INDEX = new_df$beeID, FUN = function(x) x[1] ))
pframe <- data.frame(treatment = levels(droplevels(new_df$treatment)),IT = ITmean, hive = factor(4, levels = levels(new_df$hive)), beeID = 99999)
pframe$freq <- 0
pp <- predict(m1, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(m1, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('treatment', "blo", "bhi", "predMean")]
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
g0 <- ggplot(pframe, aes(x=treatment, y=predMean))+
geom_point()+
labs(y = "Sonication frequency (Hz)", x = "Flower treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
theme_classic()
g0
ggsave(plot = g0, filename = file.path(figDir, "HeavyLight_PredsAndCI_freq.pdf"), width =4, height = 3)
First, convert amplitude (originally measured in Volts) to m/s/s, using the accelerometer calibration
# convert amplitude to m/s/s
# conversion factor = 10.17 mv/(m/s/s)
new_df$amp_acc = (new_df$amp * 1000) / 10.17
ma1 = lmer(amp_acc ~ treatment + IT + hive + (1|beeID), data = new_df, REML = FALSE)
ma1_1 = lmer(amp_acc ~ treatment * IT + hive + (1|beeID), data = new_df, REML = FALSE)
BIC(ma1, ma1_1) # interaction is ever so slightly better
## df BIC
## ma1 6 21221.64
## ma1_1 7 21221.31
ma2 <- update(ma1_1, .~. - hive)
BIC(ma1_1, ma2) # hive not important
## df BIC
## ma1_1 7 21221.31
## ma2 6 21214.53
ma3 <- update(ma2, .~., REML = TRUE)
summary(ma2) # final model to report
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: amp_acc ~ treatment + IT + (1 | beeID) + treatment:IT
## Data: new_df
##
## AIC BIC logLik deviance df.resid
## 21179.9 21214.5 -10584.0 21167.9 2354
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6197 -0.6795 -0.1395 0.4918 5.0480
##
## Random effects:
## Groups Name Variance Std.Dev.
## beeID (Intercept) 76.04 8.72
## Residual 443.09 21.05
## Number of obs: 2360, groups: beeID, 36
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 19.272 18.858 1.022
## treatmentweighted -39.453 10.194 -3.870
## IT 5.913 4.740 1.247
## treatmentweighted:IT 7.254 2.551 2.844
##
## Correlation of Fixed Effects:
## (Intr) trtmnt IT
## trtmntwghtd -0.317
## IT -0.996 0.316
## trtmntwg:IT 0.318 -0.996 -0.320
# diagnostics
# diagnostics
plot(ma3)
qqnorm(ranef(ma3)$beeID[[1]])
qqline(ranef(ma3)$beeID[[1]])
sjp.lmer(ma3) # plot random effects to find any outliers
## Plotting random effects...
sjp.lmer(ma3, type = 'fe', sort = TRUE, p.kr = FALSE) # plot fixed effects
## Computing p-values via Wald-statistics approximation (treating t as Wald z).
# set number of bootstrap replicates for models
nbootSims2 = 1000
# using hive 5, since it's the one with the most data
pframe <- data.frame(treatment = levels(droplevels(new_df$treatment)), IT = ITmean, hive = factor(4, levels = levels(new_df$hive)), beeID = 99999)
pframe$amp_acc <- 0
pp <- predict(ma3, newdata = pframe, re.form=NA, type = 'response') # re.form sets all random effects to 0
### Calculate CI's (using bootstrap, not accounting for random effects)
bb2 <- bootMer(ma3, FUN=function(x) predict(x, pframe, re.form=NA, type = 'response'), nsim = nbootSims2)
bb2_se <-apply(bb2$t,2,function(x) quantile(x, probs = c(0.025, 0.975)))
pframe$blo<-bb2_se[1,]
pframe$bhi<-bb2_se[2,]
pframe$predMean <- pp
pframe <- pframe[, c('treatment', "blo", "bhi", "predMean")]
# "Mean and bootstrap CI based on fixed-effects uncertainty ONLY"
# Holding IT = meanIT
print(ITmean)
## [1] 3.965833
ga0 <- ggplot(pframe, aes(x=treatment, y=predMean))+
geom_point()+
labs(y = expression ("Sonication amplitude "(m~s^{-2})), x = "Flower treatment") +
geom_errorbar(aes(ymin = blo, ymax = bhi), width = 0.1)+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = 'none') +
theme_classic() +
annotate(geom="text", x=c(1,2), y=c(0, 0) + 50, label=c("a", "b"),
color="black")
ga0
ggsave(plot = ga0, filename = file.path(figDir, "HeavyLight_PredsAndCI_amp.pdf"), width =4, height = 3)